/*
PROGRAM BY: Teresa Fort
Last edited 04/23/2020

This program makes a firm-country-hs10 level dataset that distinguishes imports by type

It flags imports as inputs or produced goods using industry or firm-level information;


*/ 

%include 'yyyy/pdata.sas';

options obs=max;
    

libname ecroot 'xxxx/data';
%let input=xxxx/input ;

%let lbdvers=c201601 ;
%let output=xxxx/data ;




%macro make_import_type(y);
*1. Bring in the Trade data;
*************************************;
data imports;
    set pdata.imp_comb&y._&lbdvers. ;
    hs6=substr(hs,1,6);
    if related="Y" then rp_value=value ;
    *want to keep consumption imports only;
    if type=1 | type=2 | type=5;
    if firmid~=" " ;
run;
*************************************;
%if &y. = 1992 %then %do;
data firm_sic01;
   set ecroot.estabs_all_clean(keep=firmid sic_census emp_cen sales year rename=(sic_census=sic_cmf) );
    if year = &y.;
run;
    

*2. Aggregate to yearly level (easy to change to monthly level);
*************************************;
proc sort data=imports; by firmid hs country; run;

proc means sum noprint data=imports;
   by firmid hs country;
   var value rp_value;
   output out=imports02(drop=_type_ _freq_)  sum()= ;
run; 
*************************************;

  
  proc export data = imports02
    file = "&output/temp_imports.dta"
    dbms=stata replace;
run;

*3. Firm-by-industry-hs-country (repeat import values by naics);
*************************************;
*a) Make a firmid-naics dataset;
************;


proc sort data=firm_sic01; by firmid sic_cmf; run;

proc means sum noprint data=firm_sic01;
   by firmid sic_cmf;
   var emp_cen sales;
   output out=firm_sic02(drop=_freq_ _type_) sum()=;
run;
***********;

*b) Joinby to naics-hs bridges; 
***********;
 *i) input codes;
 proc sql ;
  create table firm_sic_hs_in as 
  select A.firmid, A.sic_cmf, B.*
  from firm_sic02 A left join ecroot.sic_out_hs_in&y. B 
    on A.sic_cmf=B.sic_cmf;
 quit;
 
 *ii) output codes;
  proc sql ;
  create table firm_sic_hs_prod as 
  select A.firmid, A.sic_cmf, B.*
  from firm_sic02 A left join ecroot.sic_out_hs_prod&y. B 
    on A.sic_cmf=B.sic_cmf;
 quit;
***********;

*c) Collapse to firmid-hs level and rename variables for merge;
*************************************;
 %macro ctype(t); 
 proc sort data=firm_sic_hs_&t; by firmid hs; run;
 proc means sum noprint data=firm_sic_hs_&t;
    by firmid hs;
    var mlevel: ;
    output out=firm_sic_hs_&t._02(drop=_type_) sum()=;
 run;

 data firm_sic_hs_&t._03(drop=mlevel:) ;
    set firm_sic_hs_&t._02(drop=_freq_);
    %macro lev();
      %do i=3 %to 6 ;
      sic_&t._l&i=mlevel_&i ;
      label sic_&t._l&i="Merge level of &t code to HS" ;
      %end;
    %mend;
      %lev;
 run;   
 %mend;
 %ctype(in);
 %ctype(prod);
*************************************;


*4. Firm-hs-country dataset merged by firmid-hs for inuts and prods;
*************************************;
proc sort data=ecroot.firm_hs_inputs&y. out=firm_hs_inputs; by firmid hs ; run;
proc sort data=ecroot.firm_hs_prods&y. out=firm_hs_prods; by firmid hs ; run;

data firm_flags;
   merge  imports02		(in=a) 
	  firm_hs_inputs    	(in=b) 
	  firm_hs_prods		(in=c rename=(mlevel_4=plevel_4 mlevel_3=plevel_3))
	  firm_sic_hs_in_03   (in=d)
	  firm_sic_hs_prod_03 (in=e)
	  ;
   by firmid hs ;
   if b=1 then f_input=1;
   if c=1 then f_prod=1;
   if d=1 then n_input=1;
   if e=1 then n_prod=1;
   if a=1 ;
run;
*************************************;

*5. Add manuf, short form, and mat-prod data indicators;
*************************************;
proc sort nodupkey data=ecroot.cmf_mat_match&y. out=in_mat  (keep=firmid); by firmid; run;
proc sort nodupkey data=ecroot.cmf_prod_match&y. out=in_prod(keep=firmid); by firmid; run;

data estabs_clean_&y.;
    set ecroot.estabs_all_clean;
    if year = &y.;
run;
    
proc sort data=estabs_clean_&y. out=cmf(keep=firmid ar); by firmid; run;

/*
data cmf2;
  set cmf;
  if shortform=1 then short=1;
  if shortform=0 then short=0;
run;  
*/
proc means sum noprint data=cmf; 
   by firmid;
   var ar; 
output out=cmf3(drop=_freq_ _type_ ) sum()= ;
run;

data firm_flags2 ;
  merge firm_flags(in=a) cmf3(in=b) in_mat(in=c) in_prod(in=d);
  by firmid;
  if b=1 then in_cmf=1;
  if c=1 then in_mat=1;
  if d=1 then in_prod=1;
  if a=1;
run;

*************************************;

*6. Output final dataset;
*************************************;
proc sort data=firm_flags2; by firmid hs country; run;

data firm_imports_type&y.;
    set firm_flags2;
    year = &y.;
run;
%end;
    
%if &y. >= 1997 %then %do;
    
%if &y. = 1997 %then %do;
data firm_naics01;
   set ecroot.estabs_all_clean(keep=firmid naics97_census emp_cen sales year rename=(naics97_census=naics_cmf) );
    if year = &y.;
run;
%end;
    
%if &y. = 2002 %then %do;
data firm_naics01;
   set ecroot.estabs_all_clean(keep=firmid naics02_census emp_cen sales year rename=(naics02_census=naics_cmf) );
    if year = &y.;
run;   
%end;
    
%if &y. = 2007 %then %do;
data firm_naics01;
   set ecroot.estabs_all_clean(keep=firmid naics07_census emp_cen sales year rename=(naics07_census=naics_cmf) );
    if year = &y.;
run;   
%end;
    
%if &y. = 2012 %then %do;
data firm_naics01;
   set ecroot.estabs_all_clean(keep=firmid naics12_census emp_cen sales year rename=(naics12_census=naics_cmf) );
    if year = &y.;
run;   
%end;



*2. Aggregate to yearly level (easy to change to monthly level);
*************************************;
proc sort data=imports; by firmid hs country; run;

proc means sum noprint data=imports;
   by firmid hs country;
   var value rp_value;
   output out=imports02(drop=_type_ _freq_)  sum()= ;
run; 
*************************************;

  
  proc export data = imports02
    file = "&output/temp_imports.dta"
    dbms=stata replace;
run;

*3. Firm-by-industry-hs-country (repeat import values by naics);
*************************************;
*a) Make a firmid-naics dataset;
************;


proc sort data=firm_naics01; by firmid naics_cmf; run;

proc means sum noprint data=firm_naics01;
   by firmid naics_cmf;
   var emp_cen sales;
   output out=firm_naics02(drop=_freq_ _type_) sum()=;
run;
***********;

*b) Joinby to naics-hs bridges; 
***********;
 *i) input codes;
 proc sql ;
  create table firm_naics_hs_in as 
  select A.firmid, A.naics_cmf, B.*
  from firm_naics02 A left join ecroot.naics_out_hs_in&y. B 
    on A.naics_cmf=B.naics_cmf;
 quit;
 
 *ii) output codes;
  proc sql ;
  create table firm_naics_hs_prod as 
  select A.firmid, A.naics_cmf, B.*
  from firm_naics02 A left join ecroot.naics_out_hs_prod&y. B 
    on A.naics_cmf=B.naics_cmf;
 quit;
***********;

*c) Collapse to firmid-hs level and rename variables for merge;
*************************************;
 %macro ctype(t); 
 proc sort data=firm_naics_hs_&t; by firmid hs; run;
 proc means sum noprint data=firm_naics_hs_&t;
    by firmid hs;
    var mlevel: ;
    output out=firm_naics_hs_&t._02(drop=_type_) sum()=;
 run;

 data firm_naics_hs_&t._03(drop=mlevel:) ;
    set firm_naics_hs_&t._02(drop=_freq_);
    %macro lev();
      %do i=3 %to 6 ;
      naics_&t._l&i=mlevel_&i ;
      label naics_&t._l&i="Merge level of &t code to HS" ;
      %end;
    %mend;
      %lev;
 run;   
 %mend;
 %ctype(in);
 %ctype(prod);
*************************************;


*4. Firm-hs-country dataset merged by firmid-hs for inuts and prods;
*************************************;
proc sort data=ecroot.firm_hs_inputs&y. out=firm_hs_inputs; by firmid hs ; run;
proc sort data=ecroot.firm_hs_prods&y. out=firm_hs_prods; by firmid hs ; run;

data firm_flags;
   merge  imports02		(in=a) 
	  firm_hs_inputs    	(in=b) 
	  firm_hs_prods		(in=c rename=(mlevel_6=plevel_6 mlevel_5=plevel_5 mlevel_4=plevel_4 mlevel_3=plevel_3))
	  firm_naics_hs_in_03   (in=d)
	  firm_naics_hs_prod_03 (in=e)
	  ;
   by firmid hs ;
   if b=1 then f_input=1;
   if c=1 then f_prod=1;
   if d=1 then n_input=1;
   if e=1 then n_prod=1;
   if a=1 ;
run;
*************************************;

*5. Add manuf, short form, and mat-prod data indicators;
*************************************;
proc sort nodupkey data=ecroot.cmf_mat_match&y. out=in_mat  (keep=firmid); by firmid; run;
proc sort nodupkey data=ecroot.cmf_prod_match&y. out=in_prod(keep=firmid); by firmid; run;

data estabs_clean_&y.;
    set ecroot.estabs_all_clean;
    if year = &y.;
run;
    
proc sort data=estabs_clean_&y. out=cmf(keep=firmid ar); by firmid; run;

/*
data cmf2;
  set cmf;
  if shortform=1 then short=1;
  if shortform=0 then short=0;
run;  
*/
proc means sum noprint data=cmf; 
   by firmid;
   var ar; 
output out=cmf3(drop=_freq_ _type_ ) sum()= ;
run;

data firm_flags2 ;
  merge firm_flags(in=a) cmf3(in=b) in_mat(in=c) in_prod(in=d);
  by firmid;
  if b=1 then in_cmf=1;
  if c=1 then in_mat=1;
  if d=1 then in_prod=1;
  if a=1;
run;

*************************************;


proc sort data=firm_flags2; by firmid hs country; run;

data firm_imports_type&y.;
    set firm_flags2;
    year = &y.;
run;
%end;
%mend;
    
%macro loop1();
    %do y=2007 %to 2007 %by 5;
        %make_import_type(&y.);
    %end;
%mend;
%loop1();
    
data ecroot.firm_imports_type_all;
    set firm_imports_type2007 ;
run;
    
*6. Output final dataset;
*************************************; 
proc export data = ecroot.firm_imports_type_all
    file = "&output/firm_imports_type_all.dta"
    dbms=stata replace;
run;
*************************************; 

/*  

Code for all years
%macro loop1();
    %do y=1992 %to 2012 %by 5;
        %make_import_type(&y.);
    %end;
%mend;
%loop1();
    
data ecroot.firm_imports_type_all;
    set firm_imports_type1992 firm_imports_type1997 firm_imports_type2002 firm_imports_type2007 firm_imports_type2012;
run;
    
*6. Output final dataset;
*************************************; 
proc export data = ecroot.firm_imports_type_all
    file = "&output/firm_imports_type_all.dta"
    dbms=stata replace;
run;
*************************************; 
*/